#!/usr/bin/perl
use strict;
my $ngram_file = "C:\\DOCUME~1\\Administrator\\workspace\\smsindex\\data\\3gram.txt";
my $out_file = "C:\\DOCUME~1\\Administrator\\workspace\\smsindex\\data\\3gram_co_occur.txt";
open(NGRAM, $ngram_file) || die('cant open $ngram_file');
open(OUT, ">$out_file") || die('cant open $out_file');

my @global;
while(<NGRAM>){
	chomp($_);
	my @array = split("[ ]+",$_);
	my $count = pop(@array);
	my $last = pop(@array);
	my $co_occur = $array[0]." ".$last;
	my @all = ();
	push @all, $co_occur;
	push @all, $count;
	push @global, \@all; 
#	print $array[0]." ".$last." : ".$count."\n";
}
close(NGRAM);

for(my $i=0;$i<$#global; $i++){
	#print $i."\n";
	for(my $j=$i+1;$j<$#global; $j++){
		if(@global->[$i][0] eq @global->[$j][0]){
			print @global->[$i][0]." : ".@global->[$j][0]."\n";
			@global->[$i][1]+= @global->[$j][1];
			delete @global->[$j];
		}
	}
}
my @final =();
for(my $i=0;$i<$#global; $i++){
	if(@global->[$i][0]=~ /[\w]/){
		my @all;
		push @all, @global->[$i][0];
		push @all, @global->[$i][1]; 
		push @final, \@all;
	}
}
my $pos = 1;
@final = sort {$a->[$pos] <=> $b->[$pos]} @final; 

for(my $i=0;$i<$#final; $i++){
	if(@global->[$i][0]=~ /[\w]/){
		print OUT @final->[$i][0]." : ".@final->[$i][1]."\n";	
	}	
}	